library(ggplot2)
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
boulder <- st_read("/Users/zhongyinjiao/Desktop/EAS648/Lab01/BoulderSocialMedia.shp")
## Reading layer `BoulderSocialMedia' from data source
## `/Users/zhongyinjiao/Desktop/EAS648/Lab01/BoulderSocialMedia.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 55519 features and 12 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -788775 ymin: 1917813 xmax: -780555 ymax: 1930053
## Projected CRS: NAD_1983_Albers
boulder
## Simple feature collection with 55519 features and 12 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -788775 ymin: 1917813 xmax: -780555 ymax: 1930053
## Projected CRS: NAD_1983_Albers
## First 10 features:
## id DB extent Climb_dist TrailH_Dis NatMrk_Dis Trails_dis
## 1 6517284333 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## 2 6517281191 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## 3 6517278961 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## 4 6517276295 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## 5 6517274727 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## 6 6517272539 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## 7 6517270109 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## 8 6516904527 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## 9 6516902971 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## 10 6516900761 Flickr 421678.2 1973.108 2368.567 2451.633 49.73422
## Bike_dis PrarDg_Dis PT_Elev Hydro_dis Street_dis geometry
## 1 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
## 2 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
## 3 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
## 4 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
## 5 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
## 6 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
## 7 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
## 8 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
## 9 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
## 10 1437.134 1942.125 2064 1359.75 193.9165 POINT (-786099 1929916)
ggplot() +
geom_sf(data =boulder,
fill = NA, alpha = .2) +
theme_bw()
boulder = st_transform(boulder, 26753)
ggplot() +
geom_sf(data =boulder,
fill = NA, alpha = .2) +
theme_bw()
ggplot() +
geom_sf(data =boulder, aes(color=PT_Elev),
fill = NA, alpha = .2) +
theme_bw()
ggplot() +
geom_sf(data =boulder, aes(color=PT_Elev),
fill = NA, alpha = .2) +
scale_colour_gradientn(colours = terrain.colors(10)) +
theme_bw()
boulder %>%
mutate(high_elev = ifelse(PT_Elev >= 2200, TRUE, FALSE))%>%
ggplot() +
geom_sf(aes(color=high_elev),
fill = NA, alpha = .2) +
theme_bw()
boulder %>%
filter(DB == 'Pano' | DB == 'Flickr') %>%
ggplot(aes(x=DB, y=Street_dis)) +
geom_boxplot()
library(sf)
library(ggspatial)
library(viridis)
## Loading required package: viridisLite
## the function gives the hexadecimal colors
## the interger give the numbers of colors
magma(10)
## [1] "#000004FF" "#180F3EFF" "#451077FF" "#721F81FF" "#9F2F7FFF" "#CD4071FF"
## [7] "#F1605DFF" "#FD9567FF" "#FEC98DFF" "#FCFDBFFF"
ggplot() +
geom_sf(data = boulder, aes(color=PT_Elev),
fill = NA, alpha = .2) +
scale_colour_gradientn(colours = magma(10))
summary(boulder$DB)
## Length Class Mode
## 55519 character character
p <- ggplot() +
annotation_spatial(boulder) +
layer_spatial(boulder, aes(col = DB))
p + scale_color_brewer(palette = "Dark2")
library(tmap)
##
## Attaching package: 'tmap'
## The following object is masked from 'package:datasets':
##
## rivers
tmap_mode("plot")
## ℹ tmap mode set to "plot".
## Add the data - these are specific to the vector or raster
## which variable, is there a class interval, palette, and other options
tm_shape(boulder) +
tm_symbols(col = 'PT_Elev', size = 0.1) +
tm_scale_intervals(breaks = 5,
values = "brewer.yl_or_rd",
style = "quantile")
## here we are using a simple dataset of the world
# tmap_mode("plot")
data("World")
tm_shape(World) +
tm_polygons("gdp_cap_est", style='quantile', legend.title = "GDP Per Capita Estimate")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style' to 'tm_scale_intervals(<HERE>)'
## the view mode creates an interactive map
tmap_mode("view")
## ℹ tmap mode set to "view".
tm_shape(World) +
tm_polygons("gdp_cap_est", style='quantile', legend.title = "GDP Per Capita Estimate")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style' to 'tm_scale_intervals(<HERE>)'
Open data science emphasizes transparency by enabling anyone to access, analyze, and reuse scientific observations, data, and results. This openness allows scientific claims to be validated by allowing other researchers to replicate and validate findings, which is essential for building trust in scientific conclusions. In addition, open data allows for the integration of datasets from multiple sources, resulting in new insights and discoveries that may not be possible with isolated datasets. For example, by integrating spatial data from the social media shape file with other environmental or population datasets, researchers can uncover patterns in urban development or environmental impacts that remain hidden without cross-referencing data.
However, open data science also brings challenges, including maintaining data privacy and ensuring that shared data is well documented and interoperable. Sensitive information must be protected, especially when dealing with personal data or vulnerable people. To reap the benefits of open data, researchers must ensure that data is formatted and annotated in a way that others can understand and use, which requires additional time and effort. For example, the social media shapefile dataset must contain clear documentation of any transformations in its structure, data fields, and applications to ensure that it can be effectively integrated and interpreted by other researchers, supporting reproducibility and potential new discoveries.